##################################################################################
#                         GOV 2001: REPLICATION PROJECT                          #
#  Replication Article: Lipsmeyer, Christine S. & Zhu, Ling (2011)  -            #
# "IMMIGRATION, GLOBALIZATION, AND UNEMPLOYMENT BENEFITS IN DEVELOPED EU STATES" #
#                       AJPS Vol. 55(3), Pp. 647-664                             #
#              Replication Authors: Diana Draghici & Jerome Hughes               #
#                       File Version: Tue, 24 Apr 2012                           #     
##################################################################################


#################################################################################
#           GENERATE JOINT DATA SET BASED ON ORIGINAL FILES                     #    
#   [Downloaded from the sources indicated by the authors in the article]       #  
#################################################################################

### Note: The following code is based on the assumption that the user has all  
  # original datasets saved in a unique folder, specified as the working directory


#################################################################################
#       Preliminaries                                                           #                   
#################################################################################


## Set working directory 
setwd("//Users/DianaDraghici//Documents//HARVARD UNIVERSITY//Courses//Spring Semester 2012//GOV 2001//Replication Project//Lipsmeyer&Zhu_AJPS_2011//Additional Data")

## Load libraries
library(foreign)
library(XLConnect) # not available in version 2.15


#################################################################################
# (1) Generate initial data matrix = 15 countries x 37 years (1971-2007)        #                                                              
#################################################################################

tscs <- cbind(rep(1:15, each=37),  rep(1971:2007,15))
colnames(tscs) <- c("state","year")


#################################################################################
# (2) Unemployment Benefits: OECD Unemployment Benefits Data                    #                                                              
#################################################################################

OECD_UB <- read.table("OECD_BenEnt6107.csv", sep = ",", header = TRUE, 
colClasses=c("character",rep("numeric",24)), na.strings = "NA", stringsAsFactors=F)
class(OECD_UB[,3])

colnames(OECD_UB)[-1] <- seq(1961,2007,2)

## Reshape subsetted data from wide to long format
Data1 <- reshape(OECD_UB, direction="long", idvar="X", timevar="year", v.names="Entitlement",
varying = colnames(OECD_UB)[-1], times = colnames(OECD_UB)[-1])


CNamVec <- c("Austria", "Belgium", "Denmark", "Finland", "France", 
           "Germany", "Greece", "Ireland", "Italy", "Netherlands", 
           "Norway", "Portugal", "Spain", "Sweden", "UK")
CNumVec <- 1:15   


## Establish correspondence betw/ country name & number, 
   # while generating 'state' variable to enable merge
Data1$state <- NA
for(i in 1:15){
     Data1[which(Data1[,"X"]==CNamVec[i]),]$state <- CNumVec[i]
     } 

## Keep only  countries of interest
Data1 <- Data1[complete.cases(Data1[,"state"]),]

## Keep only years >=1971
Data1 <- subset(Data1, year >=1971)


## Sort by state, year
Data1 <- Data1[order(Data1$state, Data1$year),]

 
## Merge with TSCS dataframe
Data2 <- merge(tscs, Data1, by= c("state","year"), all=TRUE, incomparables=NA)
Data2$X <- NULL # remove superfluous X column


#################################################################################
# (3) Immigration: OECD International Migration Statitics                       #                                                              
#################################################################################

### Read in Dataset for Inflow of Immigrants, removing superfluous columns
Imm_In <- read.table("OECD_IMD_Inflows_7507.csv", sep = ",", header = TRUE)[-c(1:3,7)]

## Assign informative column name to variable of interest
colnames(Imm_In)[3] <- "Imm_Inflows"

## Change name of variable 'Year' to lowercase, to enable merge
colnames(Imm_In)[2] <- "year"

## Define vectors of country names & country numbers
CNamVec <- levels(Imm_In$Country)
CNumVec <- 1:15      

## Establish correspondence betw/ country name & number, 
   # while generating 'state' variable to enable merge
Imm_In$state <- NA
for(i in 1:15){
     Imm_In[which(Imm_In[,"Country"]==CNamVec[i]),]$state <- CNumVec[i]
     }     

### Read in Dataset for Outflow of Immigrants, removing superfluous columns
Imm_Out <- read.table("OECD_IMD_Outflows_7507.csv", sep = ",", header = TRUE)[-c(1:3,7)]

## Assign informative column name to variable of interest
colnames(Imm_Out)[3] <- "Imm_Outflows"

## Change name of variable 'Year' to lowercase, to enable merge
colnames(Imm_Out)[2] <- "year"

## Define vectors of country names & country numbers
CNamVec2 <- levels(Imm_Out$Country)
CNumVec2 <- match(CNamVec2, CNamVec)
 ## Note: No data for certain countries, keeping only numbers
  # corresponding to countries w/ available data

## Establish correspondence betw/ country name & number, 
   # while generating 'state' variable to enable merge
Imm_Out$state <- NA
for(i in 1:length(CNumVec2)) {
     Imm_Out[which(Imm_Out[,"Country"]==CNamVec2[i]),]$state <- CNumVec2[i]
     } 
     
### Merge datasets w/ dataset created @ previous step

Data3 <- merge(Data2, Imm_In, by= c("state","year"), all=TRUE)  
Data4 <- merge(Data3, Imm_Out, by= c("state","year"), all=TRUE)  

### Remove superfluous columns
Data4$Country.x <- Data4$Country.y <- NULL



#################################################################################
# (4) Labor Market Integration: OECD Unit Labor Cost Statitics                  #                                                              
#################################################################################

### Read in Dataset for Unit Labor Cost, removing superfluous columns
ULC <- read.table("OECD_UnitLabCost_7107.csv", sep = ",", header = TRUE)[-c(1:4,8)]

## Assign informative column name to variable of interest
colnames(ULC)[3] <- "UnitLabCost"

## Change name of variable 'Time' to 'year', to enable merge
colnames(ULC)[2] <- "year"

## Establish correspondence betw/ country name & number, 
   # while generating 'state' variable to enable merge
   # (data available for all countries, and same names on 'Country' variable
   # so vector of country names & numbers can be re-used)
ULC$state <- NA
for(i in 1:15){
     ULC[which(ULC[,"Country"]==CNamVec[i]),]$state <- CNumVec[i]
     }     

## Merge dataset w/ dataset created @ previous step
Data5 <- merge(Data4, ULC, by= c("state","year"), all=TRUE, sort=FALSE)  

### Remove superfluous column
Data5$Country <- NULL



#################################################################################
# (5) FDI, Foreign Trade, Economic Controls: Penn World Tables v. 6.3           #                                                              
#################################################################################

### Read in Penn World Table 6.3 Dataset 
PWT <- read.table("PWT_6.3_7107.csv", sep = ",", header = TRUE)


## Keep variables of interest
 # & non-collinear variables w/ non-missing values, to aid imputation process

PWT2 <- subset(PWT, select=c("country", "year", "POP", "cgdp", "ci", "openk"))


## Establish correspondence betw/ country name & number, 
   # while generating 'state' variable to enable merge
   # (data available for all countries, and same names on 'country' variable
   # so vector of country names & numbers can be re-used)
PWT2$state <- NA
for(i in 1:15){
     PWT2[which(PWT2[,"country"]==CNamVec[i]),]$state <- CNumVec[i]
     }     


## Merge dataset w/ dataset created @ previous step
Data6 <- merge(Data5, PWT2, by= c("state","year"), all=TRUE, sort=FALSE)  

### Remove superfluous columns
Data6$country <- NULL

#########################################################################################
### (6) Convert ICTWSS Database to .csv files corresponding to each country of interest
#########################################################################################
    # Note: This database is in Excel (.xls) format, 
            # with multiple sheets, each corresponding to a different country

## Read in Excel file & define workbook (required by XLConnect package)
excelFile <- ("ICTWSS_Database_30.xls")
wb <- loadWorkbook(excelFile)

## Define: # (1) vector of .xls worksheet names as they appear in the ITCWSS database
           # (2) vector of country names to be assigned to generated .csv files
CntVec <- c("AT", "BE", "DK", "FI", "FR", "DE", "EL", "IE", "IT", "NL", 
            "NO", "PT", "SE", "ES", "UK")
CNamVec <- c("Austria", "Belgium", "Denmark", "Finland", "France", 
           "Germany", "Greece", "Ireland", "Italy", "Netherlands", 
           "Norway", "Portugal", "Spain", "Sweden", "United Kingdom")

## Loop thru relevant Excel worksheets to convert to .csv format
   # Notes: # start @ 2nd row, as 1st row empty
            # 1st row after header removed b/c empty
for (i in 1:length(CntVec)) {
data <- readWorksheet(wb, sheet = i, startRow=2, header = TRUE)[-1,]
        write.csv(data, file=paste("ICTWSS", CNamVec[i], "csv", sep="."), row.names=FALSE) 
        }
        
#####################
        
### Import ICTWSS data into R

ICTWSS <- matrix(NA,555,3)
colnames(ICTWSS) <- c("state","year","UD")

for (i in 1:length(CNamVec)){
	Dat <- read.table(paste("ICTWSS", CNamVec[i], "csv", sep="."), sep = ",", header = TRUE) 
     colnames(Dat)[1] <- "year"
     Dat$state <- i
     ICTWSS[((i-1)*37+1):(i*37), ]  <- as.matrix(Dat[12:48,c("state","year","UD")])
     }   
     
    
## Merge dataset w/ dataset created @ previous step
Data7 <- merge(Data6, ICTWSS, by= c("state","year"), all=TRUE, sort=FALSE)  



#################################################################################
# (7) Comparative Political Parties Data Set                                    #                                                              
#################################################################################

Parties <- read.table("PolParties5006.csv", sep = ",", header = TRUE)

## Select variable & years of interest
Parties2 <- subset(Parties, YEAR >=1971, select=c("COID","YEAR","LEFTGS"))

## Recode -999 as NA
Parties2[which(Parties2[,"LEFTGS"]== -999), "LEFTGS"] <- NA

## From Swank's codebook: 
#COID Country identifier: 1 for Australia, 2 for Austria, 3 for Belgium, 4 for Canada, 5 for Denmark, 6 for Finland, 7 for France, 8 for West Germany, 9 for Ireland, 10 for Italy, 11 for Japan, 12 for Netherlands, 13 for New Zealand, 14 for Norway, 15 for Sweden, 16 for Switzerland, 17 for the United Kingdom, 18 for the United States, 19 for Greece, 20 for Portugal, and 21 for Spain.
###

KeepVec <- c(2,3,5,6,7,8,19,9,10,12,14,20,21,15,17) #based on codebook excerpt above

## Establish correspondence betw/ country name & COID, 
   # while generating 'state' variable to enable merge
Parties2$state <- NA
for(i in 1:15){
    Parties2[which(Parties2[,"COID"]== KeepVec[i]), ]$state <- CNumVec[i]
  }
  
## Keep only observations with non-missing on the state variable
 # to select only countries of interest
Parties3 <- Parties2[complete.cases(Parties2[,"state"]),]

## Sort by state, while removing COID column
Parties4 <- Parties3[order(Parties3$state),-1]

## Rename YEAR year to enable merge
colnames(Parties4)[1] <- "year"


## Merge dataset w/ dataset created @ previous step
Data8 <- merge(Data7, Parties4, by= c("state","year"), all=TRUE, sort=FALSE)  


#################################################################################
# (8) Golden, Lange & Wallerstein Data on Union Density                         #                                                              
#################################################################################

GLW <- read.table("GLW_union_centralization.csv", sep = ",", header = TRUE)[,c(2,3,6)]


CodeVec <- c("AUT","BEL", "DNK", "FIN", "FRA", "GER", "IRE", "ITA", "NTH",
"NOR", "POR", "SPN", "SWE", "UK")
# Note: Greece (state=7) not included in the dataset
NVec <- c(1:6,8:15)


## Establish correspondence betw/ country code & state number, 
   # while generating 'state' variable to enable merge
GLW$state <- NA
for(i in 1:length(NVec)){
    GLW[which(GLW[,"country"]== CodeVec[i]), ]$state <- NVec[i]
  }

## Keep only observations with non-missing on the state variable
 # to select only countries of interest
GLW2 <- GLW[complete.cases(GLW[,"state"]),]
summary(GLW2)

## Keep only years >=1971
GLW3 <- subset(GLW2, year >=1971)


## Merge dataset w/ dataset created @ previous step
Data9 <- merge(Data8, GLW3, by= c("state","year"), all=TRUE, sort=FALSE)  


## Sort by state
Data10 <- Data9[order(Data9$state, Data9$year),]


### Remove superfluous column
Data10$country <- NULL
dim(Data10)

#################################################################################
# (9) Unemployment Rates                                                   #                                                              
#################################################################################

Unempl <- read.table("Unempl7107.csv", sep = ",", header = TRUE)[,c("Country","Time", "Value")]
CNamVec <- c("Austria", "Belgium", "Denmark", "Finland", "France", 
           "Germany", "Greece", "Ireland", "Italy", "Netherlands", 
           "Norway", "Portugal", "Spain", "Sweden", "United Kingdom")
CNumVec <- 1:15

## Establish correspondence betw/ country code & state number, 
   # while generating 'state' variable to enable merge
Unempl$state <- NA
for(i in 1:length(CNamVec)){
    Unempl[which(Unempl[,"Country"]== CNamVec[i]), ]$state <- CNumVec[i]
  }

## Rename variables
colnames(Unempl)[2] <- "year"
colnames(Unempl)[3] <- "UnemplRate"


## Merge dataset w/ dataset created @ previous step
Data11 <- merge(Data10, Unempl, by= c("state","year"), all=TRUE, sort=FALSE)  


## Sort by state, year
Data12 <- Data11[order(Data11$state, Data11$year),]


### Remove superfluous column
Data12$Country <- NULL
dim(Data12)


#################################################################################
# (10) Authors' measures that were more complete & widely different             #                                                              
#################################################################################

ajps1 <- read.dta("AJPS Immigration Data.dta")
ajps2 <- subset(ajps1, select=c("state", "year", "netimmrate", "laborcost_new"))


## Merge dataset w/ dataset created @ previous step
Data13 <- merge(Data12, ajps2, by= c("state","year"), all=TRUE, sort=FALSE)  

## Sort by state, year
Data14 <- Data13[order(Data13$state, Data13$year),]
dim(Data14)
head(Data14)

#########################################

### Final Joint Dataset

AJPS2011REPL <- Data14[,-c(4,5)]
head(AJPS2011REPL)

write.table(AJPS2011REPL, file="AJPS2011REPL.csv", col.names=TRUE, sep=",")
write.dta(AJPS2011REPL, file="AJPS2011REPL.dta")


########################################################################################
